library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
daily = read.csv('Foreign_Exchange_Rates.csv')[2:24]
daily
names(daily)
## [1] "Time.Serie"
## [2] "AUSTRALIA...AUSTRALIAN.DOLLAR.US."
## [3] "EURO.AREA...EURO.US."
## [4] "NEW.ZEALAND...NEW.ZELAND.DOLLAR.US."
## [5] "UNITED.KINGDOM...UNITED.KINGDOM.POUND.US."
## [6] "BRAZIL...REAL.US."
## [7] "CANADA...CANADIAN.DOLLAR.US."
## [8] "CHINA...YUAN.US."
## [9] "HONG.KONG...HONG.KONG.DOLLAR.US."
## [10] "INDIA...INDIAN.RUPEE.US."
## [11] "KOREA...WON.US."
## [12] "MEXICO...MEXICAN.PESO.US."
## [13] "SOUTH.AFRICA...RAND.US."
## [14] "SINGAPORE...SINGAPORE.DOLLAR.US."
## [15] "DENMARK...DANISH.KRONE.US."
## [16] "JAPAN...YEN.US."
## [17] "MALAYSIA...RINGGIT.US."
## [18] "NORWAY...NORWEGIAN.KRONE.US."
## [19] "SWEDEN...KRONA.US."
## [20] "SRI.LANKA...SRI.LANKAN.RUPEE.US."
## [21] "SWITZERLAND...FRANC.US."
## [22] "TAIWAN...NEW.TAIWAN.DOLLAR.US."
## [23] "THAILAND...BAHT.US."
names(daily) = c('Date', 'Australia', 'Euro', 'NewZealand', 'UK', 'Brazil', 'Canada', 'China', 'HongKong', 'India', 'Korea', 'Mexico', 'SouthAfrica', 'Singapore', 'Denmark', 'Japan', 'Malaysia', 'Norway', 'Sweden', 'SriLanka', 'Switzerland', 'Taiwan', 'Thailand')
daily$Date = as.Date(daily$Date, format = "%Y-%m-%d")
head(daily)
sapply(daily, class)
## Date Australia Euro NewZealand UK Brazil
## "Date" "character" "character" "character" "character" "character"
## Canada China HongKong India Korea Mexico
## "character" "character" "character" "character" "character" "character"
## SouthAfrica Singapore Denmark Japan Malaysia Norway
## "character" "character" "character" "character" "character" "character"
## Sweden SriLanka Switzerland Taiwan Thailand
## "character" "character" "character" "character" "character"
daily[,2:23] = sapply(daily[2:23], as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
# Calculate NA Values
colSums(is.na.data.frame(daily))
## Date Australia Euro NewZealand UK Brazil
## 0 198 198 198 198 198
## Canada China HongKong India Korea Mexico
## 198 197 198 199 198 198
## SouthAfrica Singapore Denmark Japan Malaysia Norway
## 198 198 198 198 198 198
## Sweden SriLanka Switzerland Taiwan Thailand
## 198 198 198 201 198
# Delete NA Values
daily = na.omit(daily)
ggplot(data= daily, aes(x= Date, y= India)) + geom_line(color='royalblue', size =0.1)
ggplot() +
geom_line(data= daily, aes(x= Date, y= India, color='India'), size=0.25) +
geom_line(data= daily, aes(x= Date, y= SriLanka, color='SriLanka'), size=0.25) +
ylab('Indian Rupee vs Sri Lankan Rupee') +
scale_color_discrete(name = "Currency: Rupee", labels = c("India", "Sri Lanka")) +
theme_minimal()
ggplot(data= daily, aes(x= Date, y= India)) +
geom_point(color='royalblue', size=0.1, alpha=0.25) +
geom_smooth(method = 'loess', formula = y ~ x, se = F,
color='slategrey', size=0.25) +
ylab('Indian Rupee')
with(daily, plot(Date, India, pch = 1, col = "blue"))
abline(lm(formula = India~Date, data = daily))
with(daily, plot(Date, Euro, pch = 1, col = "red"))
abline(lm(formula = Euro~Date, data = daily))
plot_ly(daily, x= ~Date, y= ~India, type='scatter', mode='lines', name= 'India') %>%
add_trace(y= ~SriLanka, type='scatter', mode='lines', name= 'Sri Lanka') %>%
add_trace(y= ~Euro, type='scatter', mode='lines', name= 'Euro') %>%
layout(yaxis = list(type = "log", title='Currency'))
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
plot_ly(daily, x= ~Date) %>%
add_trace(y= ~Australia, type='scatter', mode='lines', name= 'Australia') %>%
add_trace(y= ~Euro, type='scatter', mode='lines', name= 'Euro') %>%
add_trace(y= ~UK, type='scatter', mode='lines', name= 'UK') %>%
add_trace(y= ~Canada, type='scatter', mode='lines', name= 'Canada') %>%
add_trace(y= ~Japan, type='scatter', mode='lines', name= 'Japan') %>%
add_trace(y= ~Korea, type='scatter', mode='lines', name= 'Korea') %>%
layout(yaxis = list(type = "log", title='Currency'))
Time Series analysis can be classified as:
Techniques used for time series analysis:
Stationary Data Stationarity means that the statistical properties of a process generating a time series do not change over time . It does not mean that the series does not change over time, just that the way it changes does not itself change over time. It means that the properties of the series doesn’t depend on the time when it is captured. A white noise series and series with cyclic behavior can also be considered as stationary series.
Univariate Data
df = ts(data = daily, start = c(2000, 01), end= c(2019, 12), frequency = 12)
ts.plot(daily[, c(3, 5, 7, 8, 11)], col = 1:5, log='y')
legend("right",
legend = c(names(daily[, c(3, 5, 7, 8, 11)])),
col= 1:5, lty=1:5, lwd = 5)
india = ts(data = daily[, 11], start = c(2000, 01), end = c(2019, 12), frequency = 12)
india
## Jan Feb Mar Apr May Jun Jul Aug Sep
## 2000 1128.00 1122.50 1135.00 1146.50 1138.00 1133.50 1147.00 1144.50 1135.50
## 2001 1126.00 1128.00 1124.50 1127.00 1127.80 1125.00 1121.00 1124.00 1127.00
## 2002 1129.60 1129.50 1124.00 1121.00 1115.00 1128.00 1128.00 1126.00 1128.00
## 2003 1144.00 1137.00 1135.00 1131.31 1128.50 1120.50 1120.50 1119.30 1118.70
## 2004 1121.00 1118.40 1118.50 1118.00 1117.30 1118.18 1117.00 1113.00 1110.00
## 2005 1109.40 1107.50 1105.50 1113.00 1114.00 1115.50 1112.00 1107.50 1110.50
## 2006 1111.20 1113.80 1110.00 1109.50 1109.00 1108.50 1108.80 1108.50 1108.10
## 2007 1110.10 1110.00 1111.50 1111.00 1109.70 1109.50 1109.20 1113.00 1113.50
## 2008 1118.80 1122.50 1131.50 1135.00 1133.50 1129.90 1137.50 1137.00 1130.00
## 2009 1117.00 1113.20 1116.00 1115.50 1113.40 1114.40 1114.50 1115.00 1116.40
## 2010 1118.80 1119.50 1118.20 1118.30 1116.50 1115.00 1115.15 1113.90 1117.50
## 2011 1116.70 1115.70 1111.30 1113.00 1113.10 1113.70 1112.30 1111.60 1113.00
## 2012 1115.00 1117.00 1116.90 1115.80 1115.80 1115.00 1116.00 1118.00 1116.50
## 2013 1117.00 1116.30 1115.20 1115.00 1115.10 1114.50 1114.14 1114.80 1114.20
## 2014 1108.60 1108.80 1105.70 1105.50 1107.40 1111.00 1109.50 1109.50 1109.50
## 2015 1140.00 1127.00 1127.00 1128.00 1135.00 1126.00 1120.00 1114.00 1115.90
## 2016 1120.00 1120.00 1119.00 1121.90 1124.00 1124.00 1132.00 1130.00 1136.00
## 2017 1141.00 1138.00 1140.00 1140.00 1138.00 1140.00 1139.00 1138.30 1134.80
## 2018 1136.50 1132.00 1137.00 1141.00 1139.00 1138.70 1141.30 1144.50 1157.00
## 2019 1189.00 1189.00 1204.00 1217.00 1211.00 1220.00 1204.00 1201.00 1201.00
## Oct Nov Dec
## 2000 1125.00 1127.00 1134.00
## 2001 1131.00 1130.50 1130.00
## 2002 1129.00 1135.00 1136.00
## 2003 1119.20 1123.00 1120.20
## 2004 1108.70 1112.10 1112.50
## 2005 1110.00 1108.50 1107.00
## 2006 1111.00 1110.00 1110.00
## 2007 1114.90 1115.00 1115.60
## 2008 1133.00 1125.20 1118.00
## 2009 1122.22 1120.00 1119.30
## 2010 1117.50 1119.00 1118.50
## 2011 1114.20 1116.00 1115.60
## 2012 1115.30 1114.50 1116.00
## 2013 1114.10 1111.90 1110.20
## 2014 1110.00 1115.20 1119.90
## 2015 1115.30 1118.18 1120.00
## 2016 1140.00 1140.00 1132.00
## 2017 1136.30 1135.00 1139.00
## 2018 1168.00 1179.00 1191.00
## 2019 1192.00 1184.00 1191.00
plot(india, ylab = 'Rupees', main="Indian Rupee growth 2000 through 2019")
india_components = decompose(india)
plot(india_components)
It tests whether a time series variable is non-stationary and possesses a unit root. The null hypothesis is generally defined as the presence of a unit root and the alternative hypothesis is either stationarity, trend stationarity or explosive root depending on the test used. It’s a feature to check stochastic processes, such as random walks, that can cause problems in statistical inference involving time series models.
# install.packages("fUnitRoots")
library("fUnitRoots")
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics
urkpssTest(india, type = c("tau"), lags = c("short"),use.lag = NULL, doplot = TRUE)
##
## Title:
## KPSS Unit Root Test
##
## Test Results:
## NA
##
## Description:
## Wed Aug 19 10:31:52 2020 by user:
tsstationary = diff(india, differences=1)
plot(tsstationary)
Correlation is a statistical technique that can show whether and how strongly pairs of variables are related. ### Covariance Covariance is a measure of the joint variability of two random variables. If the greater values of one variable mainly correspond with the greater values of the other variable, and the same holds for the lesser values, the covariance is positive. ### Partial Correlation In time series analysis, the PACF gives the partial correlation of a stationary time series with its own lagged values, regressed the values of the time series at all shorter lags. It contrasts with the autocorrelation function, which does not control for other lags.
acf(india, plot=F)
##
## Autocorrelations of series 'india', by lag
##
## 0.0000 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333
## 1.000 0.956 0.915 0.870 0.816 0.756 0.693 0.619 0.557 0.493 0.437
## 0.9167 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 1.7500
## 0.397 0.359 0.322 0.296 0.275 0.261 0.257 0.254 0.246 0.241 0.234
## 1.8333 1.9167
## 0.229 0.228
acf(india, type = 'covariance', plot=F)
##
## Autocovariances of series 'india', by lag
##
## 0.0000 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333
## 442 422 404 384 361 334 306 274 246 218 193
## 0.9167 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 1.7500
## 175 159 142 131 122 116 113 112 109 106 103
## 1.8333 1.9167
## 101 101
acf(india, type = 'covariance', plot=F)
##
## Autocovariances of series 'india', by lag
##
## 0.0000 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333
## 442 422 404 384 361 334 306 274 246 218 193
## 0.9167 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 1.7500
## 175 159 142 131 122 116 113 112 109 106 103
## 1.8333 1.9167
## 101 101
acf(india, type = 'covariance')
pacf(india, plot=F)
##
## Partial autocorrelations of series 'india', by lag
##
## 0.0833 0.1667 0.2500 0.3333 0.4167 0.5000 0.5833 0.6667 0.7500 0.8333 0.9167
## 0.956 0.012 -0.071 -0.123 -0.121 -0.062 -0.150 0.091 -0.024 0.075 0.158
## 1.0000 1.0833 1.1667 1.2500 1.3333 1.4167 1.5000 1.5833 1.6667 1.7500 1.8333
## 0.003 -0.028 0.002 0.031 0.014 0.063 0.022 -0.085 -0.021 -0.024 0.018
## 1.9167
## 0.057
pacf(india)
timeseriesseasonallyadjusted <- india- india_components$seasonal
tsstationary <- diff(timeseriesseasonallyadjusted)
plot(tsstationary)
acf(tsstationary)
acf(tsstationary, type='covariance')
pacf(tsstationary)
Order specifies the non-seasonal part of the ARIMA model: (p, d, q) refers to the AutoRegression order, the degree of difference, and the MovingAverage order.
Seasonal specifies the seasonal part of the ARIMA model, plus the period (which defaults to frequency(x) i.e 12 in this case). This function requires a list with components order and period, but given a numeric vector of length 3, it turns them into a suitable list with the specification as the ‘order’.
Method refers to the fitting method, which can be ‘maximum likelihood (ML)’ or ‘minimize conditional sum-of-squares (CSS)’. The default is conditional-sum-of-squares.
fitARIMA <- arima(india,
order=c(1,1,1),
seasonal = list(order = c(1,0,0), period = 12),
method="ML")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following object is masked from 'package:timeSeries':
##
## time<-
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
coeftest(fitARIMA)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.263108 0.830011 -0.3170 0.7512
## ma1 0.290375 0.825234 0.3519 0.7249
## sar1 -0.080456 0.078604 -1.0236 0.3060
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
confint(fitARIMA)
## 2.5 % 97.5 %
## ar1 -1.8898999 1.36368472
## ma1 -1.3270534 1.90780339
## sar1 -0.2345172 0.07360442
predict(fitARIMA, n.ahead = 20)
## $pred
## Jan Feb Mar Apr May Jun Jul Aug
## 2020 1191.416 1191.349 1190.159 1189.109 1189.593 1188.868 1190.156 1190.397
## 2021 1191.168 1191.174 1191.269 1191.354 1191.315 1191.373 1191.270 1191.250
## Sep Oct Nov Dec
## 2020 1190.397 1191.121 1191.765 1191.202
## 2021
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 2020 4.597650 6.591302 8.089602 9.355187 10.467853 11.473361 12.397524
## 2021 16.814457 17.359770 17.889109 18.403064 18.903093 19.390220 19.865409
## Aug Sep Oct Nov Dec
## 2020 13.257434 14.064865 14.828396 15.554491 16.248172
## 2021 20.329493
plot(forecast(fitARIMA, h = 20, level=c(99.5)))
auto = auto.arima(india, trace=TRUE)
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,1,2)(1,0,1)[12] with drift : 1382.349
## ARIMA(0,1,0) with drift : 1409.073
## ARIMA(1,1,0)(1,0,0)[12] with drift : 1377.548
## ARIMA(0,1,1)(0,0,1)[12] with drift : 1412.026
## ARIMA(0,1,0) : 1407.821
## ARIMA(1,1,0) with drift : 1410.377
## ARIMA(1,1,0)(2,0,0)[12] with drift : 1388.869
## ARIMA(1,1,0)(1,0,1)[12] with drift : 1379.26
## ARIMA(1,1,0)(0,0,1)[12] with drift : 1411.51
## ARIMA(1,1,0)(2,0,1)[12] with drift : 1388.744
## ARIMA(0,1,0)(1,0,0)[12] with drift : 1374.78
## ARIMA(0,1,0)(2,0,0)[12] with drift : 1385.995
## ARIMA(0,1,0)(1,0,1)[12] with drift : 1376.453
## ARIMA(0,1,0)(0,0,1)[12] with drift : 1410.051
## ARIMA(0,1,0)(2,0,1)[12] with drift : 1385.974
## ARIMA(0,1,1)(1,0,0)[12] with drift : 1376.631
## ARIMA(1,1,1)(1,0,0)[12] with drift : 1378.26
## ARIMA(0,1,0)(1,0,0)[12] : 1373.959
## ARIMA(0,1,0)(2,0,0)[12] : 1385.048
## ARIMA(0,1,0)(1,0,1)[12] : 1375.566
## ARIMA(0,1,0)(0,0,1)[12] : 1408.914
## ARIMA(0,1,0)(2,0,1)[12] : 1385.56
## ARIMA(1,1,0)(1,0,0)[12] : 1376.57
## ARIMA(0,1,1)(1,0,0)[12] : 1375.723
## ARIMA(1,1,1)(1,0,0)[12] : 1376.922
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(0,1,0)(1,0,0)[12] : 1411.776
##
## Best model: ARIMA(0,1,0)(1,0,0)[12]
confint(auto)
## 2.5 % 97.5 %
## sar1 -0.2367748 0.0698121
predict(auto, n.ahead = 20)
## $pred
## Jan Feb Mar Apr May Jun Jul Aug
## 2020 1191.167 1191.167 1189.915 1188.829 1189.330 1188.579 1189.915 1190.165
## 2021 1190.986 1190.986 1191.091 1191.181 1191.139 1191.202 1191.091 1191.070
## Sep Oct Nov Dec
## 2020 1190.165 1190.917 1191.584 1191.000
## 2021
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 2020 4.609598 6.518956 7.984057 9.219195 10.307374 11.291162 12.195849
## 2021 16.517550 17.049289 17.564938 18.065875 18.553291 19.028226 19.491593
## Aug Sep Oct Nov Dec
## 2020 13.037911 13.828793 14.576828 15.288306 15.968115
## 2021 19.944196
plot(forecast(auto, h = 20, level = 99.5))
acf(fitARIMA$residuals)
library(FitAR)
## Loading required package: lattice
## Loading required package: leaps
## Loading required package: ltsa
## Loading required package: bestglm
##
## Attaching package: 'FitAR'
## The following object is masked from 'package:forecast':
##
## BoxCox
boxresult = LjungBoxTest (fitARIMA$residuals,k=2,StartLag=1)
plot(boxresult[,3],main= "Ljung-Box Q Test", ylab= "P-values", xlab= "Lag")
qqnorm(fitARIMA$residuals)
qqline(fitARIMA$residuals)